{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Kernelized Classification\n",
    "\n",
    "Both perceptron and SVM can be kernelized, i.e. the output of the regressor can be passed through the sign function. \n",
    "\n",
    "$$ \\hat{y} = \\text{sign} \\left[ \\sum_{i=1}^N \\alpha_i y_i K(x_i, x) \\right] $$ and a perceptron loss applied to the difference between the predictor and the true value. \n",
    "\n",
    "The perceptron loss can be posed as:\n",
    "\n",
    "$$ L(\\alpha;x_j, y_j) = \\max \\left\\{0, -\\sum_{j=1}^N y_j \\alpha_i K(x_i, x_j) \\right\\} $$ \n",
    "\n",
    "The parameters are updated as follows: \n",
    "If $\\hat{y}(x) \\neq y(x)$, set $\\alpha \\gets \\alpha + \\eta_t$.  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Code source: Sebastian Curi and Andreas Krause, based on Jaques Grobler (sklearn demos).\n",
    "# License: BSD 3 clause\n",
    "\n",
    "# We start importing some modules and running some magic commands\n",
    "% matplotlib inline\n",
    "% reload_ext autoreload\n",
    "% load_ext autoreload\n",
    "% autoreload 2\n",
    "\n",
    "# General math and plotting modules.\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "# Project files.\n",
    "from util import gradient_descent, generate_linear_separable_data, generate_circular_separable_data\n",
    "import plot_helpers\n",
    "from kernels import LinearKernel, PolynomialKernel, LaplacianKernel, GaussianKernel, PeriodicKernel\n",
    "from kernels import SumKernel\n",
    "from regularizers import L2Regularizer\n",
    "from classifiers import kNN\n",
    "\n",
    "# Widget and formatting modules\n",
    "import ipywidgets\n",
    "from ipywidgets import interact, interactive, interact_manual\n",
    "import pylab\n",
    "# If in your browser the figures are not nicely vizualized, change the following line. \n",
    "pylab.rcParams['figure.figsize'] = (10, 5)\n",
    "\n",
    "# Machine Learning library. \n",
    "import sklearn\n",
    "from sklearn import svm\n",
    "from sklearn import datasets\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "num_points = 100  # Number of points per class\n",
    "noise = 0.2  # Noise Level (needed for data generation).\n",
    "\n",
    "X, Y = generate_circular_separable_data(num_points, noise=noise, offset=1)\n",
    "fig = plt.subplot(111)\n",
    "opt = {'marker': 'r*', 'label': '+'}\n",
    "plot_helpers.plot_data(X[np.where(Y == 1)[0], 0], X[np.where(Y == 1)[0], 1], fig=fig, options=opt)\n",
    "opt = {'marker': 'bs', 'label': '-'}\n",
    "plot_helpers.plot_data(X[np.where(Y == -1)[0], 0], X[np.where(Y == -1)[0], 1], fig=fig, options=opt)\n",
    "fig.legend();\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def kernelized_perceptron(eta0, n_iter, batch_size, kernel, deg):\n",
    "    if kernel == 'Linear':\n",
    "        classifier = PolynomialKernel(X, Y, reg=0.00, deg=1, prediction=False)\n",
    "    elif kernel == 'Polynomial':\n",
    "        classifier = PolynomialKernel(X, Y, reg=0.00, deg=deg, prediction=False)\n",
    "    elif kernel == 'Gaussian':\n",
    "        classifier = GaussianKernel(X, Y, reg=0.00, bw=0.2, prediction=False)   \n",
    "    elif kernel == 'Laplacian':\n",
    "        classifier = LaplacianKernel(X, Y, reg=0.00, bw=0.2, prediction=False)  \n",
    "    \n",
    "    regularizer = L2Regularizer(0.)\n",
    "    \n",
    "    alpha0 = 0 * np.random.randn(X.shape[0])\n",
    "\n",
    "    opts = {'eta0': eta0,\n",
    "            'n_iter': n_iter,\n",
    "            'batch_size': batch_size,\n",
    "            'n_samples': X.shape[0],\n",
    "            'algorithm': 'SGD',\n",
    "            'learning_rate_scheduling': None\n",
    "            }\n",
    "    alphas, indexes = gradient_descent(alpha0, classifier, regularizer, opts)\n",
    "\n",
    "    fig = plt.subplot(111)\n",
    "    opt = {'marker': 'r*', 'label': '+'}\n",
    "    plot_helpers.plot_data(X[np.where(Y == 1)[0], 0], X[np.where(Y == 1)[0], 1], fig=fig, options=opt)\n",
    "    opt = {'marker': 'bs', 'label': '-'}\n",
    "    plot_helpers.plot_data(X[np.where(Y == -1)[0], 0], X[np.where(Y == -1)[0], 1], fig=fig, options=opt)\n",
    "\n",
    "    contour_opts = {'n_points': 20, 'x_label': '$x$', 'y_label': '$y$', 'sgd_point': False, 'n_classes': 2}\n",
    "    opts = {'contour_opts': contour_opts}\n",
    "    plot_helpers.classification_progression(X, Y, alphas, indexes, classifier, contour_plot=fig, options=opts)\n",
    "\n",
    "interact_manual(kernelized_perceptron,\n",
    "                eta0=ipywidgets.FloatSlider(value=0.5,\n",
    "                                            min=1e-1,\n",
    "                                            max=2,\n",
    "                                            step=1 * 1e-1,\n",
    "                                            readout_format='.1f',\n",
    "                                            description='Learning rate:',\n",
    "                                            style={'description_width': 'initial'},\n",
    "                                            continuous_update=False),\n",
    "                n_iter=ipywidgets.IntSlider(value=20,\n",
    "                                            min=5,\n",
    "                                            max=50,\n",
    "                                            step=1,\n",
    "                                            description='Number of iterations:',\n",
    "                                            style={'description_width': 'initial'},\n",
    "                                            continuous_update=False),\n",
    "                batch_size=ipywidgets.IntSlider(value=X.shape[0],\n",
    "                                            min=1,\n",
    "                                            max=X.shape[0],\n",
    "                                            step=1,\n",
    "                                            description='Batch Size:',\n",
    "                                            style={'description_width': 'initial'},\n",
    "                                            continuous_update=False),\n",
    "                kernel=ipywidgets.RadioButtons(\n",
    "                             options=['Polynomial', 'Gaussian', 'Laplacian'],\n",
    "                             value='Polynomial',\n",
    "                             description='Kernel type:',\n",
    "                             style={'description_width': 'initial'}),\n",
    "                deg = ipywidgets.IntSlider(\n",
    "                         value=1,\n",
    "                         min=1,\n",
    "                         max=10, \n",
    "                         step=1,\n",
    "                         description='Degree of Polynomial kernel:',\n",
    "                         style={'description_width': 'initial'}),\n",
    "               );"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## SVM KERNELS\n",
    "The SVM can also be kernelized! let's look at an example. The actual formulation is not required in the class and discussed in more advanced classes. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def laplacian_kernel(X, Y, bw):\n",
    "    \"\"\"The laplacian kernel is not implemented in SKLEARN, but we can implement it :).\n",
    "    Because it is not compiled, the running time is a bit longer than other kernels.\"\"\"\n",
    "    rows = X.shape[0]\n",
    "    cols = Y.shape[0]\n",
    "    K = np.zeros((rows, cols))\n",
    "    for col in range(cols):\n",
    "        dist = bw * np.linalg.norm(X - Y[col, :], ord=1, axis=1)\n",
    "        K[:, col] = np.exp(-dist)\n",
    "    return K\n",
    "\n",
    "# Our dataset and targets\n",
    "n_samples = 200  # Number of points per class\n",
    "tol = 1e-1\n",
    "\n",
    "def kernelized_svm(dataset, kernel, reg, bw, deg, noise):\n",
    "    if dataset is 'blobs':\n",
    "        X, Y = datasets.make_blobs(n_samples=n_samples, centers=2, random_state=3, cluster_std=10*noise)\n",
    "    elif dataset is 'circles':\n",
    "        X, Y = datasets.make_circles(n_samples=n_samples, factor=.5, noise=noise, random_state=42)\n",
    "    elif dataset is 'moons':\n",
    "        X, Y = datasets.make_moons(n_samples=n_samples, noise=noise, random_state=42)\n",
    "    elif dataset == 'xor':\n",
    "        np.random.seed(42)\n",
    "        step = int(n_samples/4)\n",
    "        \n",
    "        X = np.zeros((n_samples, 2))\n",
    "        Y = np.zeros(n_samples)\n",
    "        \n",
    "        X[0*step:1*step, :] = noise * np.random.randn(step, 2)\n",
    "        Y[0*step:1*step] = 1\n",
    "        X[1*step:2*step, :] = np.array([1, 1]) + noise * np.random.randn(step, 2)\n",
    "        Y[1*step:2*step] = 1\n",
    "        \n",
    "        X[2*step:3*step, :] = np.array([0, 1]) + noise * np.random.randn(step, 2)\n",
    "        Y[2*step:3*step] = -1\n",
    "        X[3*step:4*step, :] = np.array([1, 0]) + noise * np.random.randn(step, 2)\n",
    "        Y[3*step:4*step] = -1\n",
    "    \n",
    "    elif dataset == 'periodic':\n",
    "        np.random.seed(42)\n",
    "        step = int(n_samples/4)\n",
    "        \n",
    "        X = np.zeros((n_samples, 2))\n",
    "        Y = np.zeros(n_samples)\n",
    "        \n",
    "        X[0*step:1*step, :] = noise * np.random.randn(step, 2)\n",
    "        Y[0*step:1*step] = 1\n",
    "        X[1*step:2*step, :] = np.array([0, 2]) + noise * np.random.randn(step, 2)\n",
    "        Y[1*step:2*step] = 1\n",
    "        \n",
    "        X[2*step:3*step, :] = np.array([0, 1]) + noise * np.random.randn(step, 2)\n",
    "        Y[2*step:3*step] = -1\n",
    "        X[3*step:4*step, :] = np.array([0, 3]) + noise * np.random.randn(step, 2)\n",
    "        Y[3*step:4*step] = -1\n",
    "        \n",
    "    X = X[Y <= 1, :]\n",
    "    Y = Y[Y <=1 ]\n",
    "    Y[Y==0] = -1\n",
    "        \n",
    "    # Add the 1 feature.  \n",
    "    X = np.concatenate((X, np.ones((X.shape[0], 1))), axis=1)\n",
    "    plot_support = True\n",
    "    if kernel == 'poly':\n",
    "        gamma = 1\n",
    "        coef0 = 0\n",
    "    elif kernel == 'sigmoid':\n",
    "        gamma = np.power(10., bw)\n",
    "        coef0 = 0\n",
    "    elif kernel == 'rbf':\n",
    "        gamma = np.power(10., -bw)\n",
    "        coef0 = 0\n",
    "    elif kernel == 'laplacian':\n",
    "        gamma = np.power(10., -bw)\n",
    "        coef0 = 0\n",
    "        kernel = lambda X, Y: laplacian_kernel(X, Y, gamma)\n",
    "        plot_support = False\n",
    "    \n",
    "    classifier = svm.SVC(kernel=kernel, C=np.power(10., -reg), gamma=gamma, degree=deg, coef0=coef0, tol=tol)\n",
    "    classifier.fit(X, Y)\n",
    "\n",
    "    # plot the line, the points, and the nearest vectors to the plane\n",
    "    plt.figure()\n",
    "    plt.clf()\n",
    "    fig = plt.axes()\n",
    "    opt = {'marker': 'r*', 'label': '+'}\n",
    "    plot_helpers.plot_data(X[np.where(Y == 1)[0], 0], X[np.where(Y == 1)[0], 1], fig=fig, options=opt)\n",
    "    opt = {'marker': 'bs', 'label': '-'}\n",
    "    plot_helpers.plot_data(X[np.where(Y == -1)[0], 0], X[np.where(Y == -1)[0], 1], fig=fig, options=opt)\n",
    "    \n",
    "    if plot_support:\n",
    "        plt.scatter(classifier.support_vectors_[:, 0], classifier.support_vectors_[:, 1], s=80,\n",
    "                    facecolors='none', edgecolors='k')\n",
    "\n",
    "    mins = np.min(X, 0)\n",
    "    maxs = np.max(X, 0)\n",
    "    x_min = mins[0] - 1\n",
    "    x_max = maxs[0] + 1\n",
    "    y_min = mins[1] - 1\n",
    "    y_max = maxs[1] + 1\n",
    "\n",
    "    XX, YY = np.mgrid[x_min:x_max:200j, y_min:y_max:200j]  \n",
    "    Xtest = np.c_[XX.ravel(), YY.ravel(), np.ones_like(XX.ravel())]\n",
    "    # Xtest = np.c_[XX.ravel(), YY.ravel()]\n",
    "    Z = classifier.decision_function(Xtest)\n",
    "\n",
    "    # Put the result into a color plot\n",
    "    Z = Z.reshape(XX.shape)\n",
    "    # plt.figure(fignum, figsize=(4, 3))\n",
    "    plt.contourf(XX, YY, Z > 0, cmap=plt.cm.jet, alpha=0.3)\n",
    "    plt.contour(XX, YY, Z, colors=['k', 'k', 'k'], linestyles=['--', '-', '--'], levels=[-.99, 0, .99])\n",
    "\n",
    "    plt.xlim(x_min, x_max)\n",
    "    plt.ylim(y_min, y_max)\n",
    "\n",
    "#     plt.xticks(())\n",
    "#     plt.yticks(())\n",
    "\n",
    "interact_manual(kernelized_svm, \n",
    "        dataset=['blobs', 'circles', 'moons', 'xor', 'periodic'],\n",
    "        kernel=['poly', 'rbf', 'laplacian'], \n",
    "        reg=ipywidgets.FloatSlider(value=-3,\n",
    "                                    min=-3,\n",
    "                                    max=3,\n",
    "                                    step=0.5,\n",
    "                                    readout_format='.1f',\n",
    "                                    description='Regularization 10^:',\n",
    "                                    style={'description_width': 'initial'},\n",
    "                                    continuous_update=False),\n",
    "        bw=ipywidgets.FloatSlider(value=-1,\n",
    "                                    min=-3,\n",
    "                                    max=3,\n",
    "                                    step=0.1,\n",
    "                                    readout_format='.1f',\n",
    "                                    description='Bandwidth 10^:',\n",
    "                                    style={'description_width': 'initial'},\n",
    "                                    continuous_update=False),  \n",
    "        deg=ipywidgets.IntSlider(\n",
    "                         value=1,\n",
    "                         min=1,\n",
    "                         max=10, \n",
    "                         step=1,\n",
    "                         description='Degree of Poly:',\n",
    "                         style={'description_width': 'initial'}),\n",
    "        noise=ipywidgets.FloatSlider(value=0.05,\n",
    "                                    min=0.01,\n",
    "                                    max=0.3,\n",
    "                                    step=0.01,\n",
    "                                    readout_format='.2f',\n",
    "                                    description='Noise level:',\n",
    "                                    style={'description_width': 'initial'},\n",
    "                                    continuous_update=False),  \n",
    "        );"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## KNN\n",
    "\n",
    "The K-Nearest Neighbors classifier is very easy. The label of a next sample is the label most voted by the $k$ training samples that are closer to this sample.\n",
    "\n",
    "A simple implementation is $$\\hat{y} = \\text{sign}\\left\\{\\sum_{i \\in \\mathcal{N}_k (x)} K(x_i, x) y_i\\right\\},$$\n",
    "\n",
    "where $\\mathcal{N}_k (x)$ is the set with the $k$ closest neighbours of $x$, $K(x_i, x)$ is a weighting coefficient, and $y_i$ is the label of example $i$. Usually $K(\\cdot, \\cdot)$ can be a kernel function, that measures the similarity between two points. In the vanilla k-NN method the kernel is just the identity function. \n",
    " \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "noise = 0.4\n",
    "X, Y = generate_circular_separable_data(num_points, noise=noise, offset=1)\n",
    "\n",
    "def change_k_nn(k):\n",
    "    classifier = kNN(X, Y, k)\n",
    "    fig = plt.subplot(111)\n",
    "    opt = {'marker': 'r*', 'label': '+'}\n",
    "    plot_helpers.plot_data(X[np.where(Y == 1)[0], 0], X[np.where(Y == 1)[0], 1], fig=fig, options=opt)\n",
    "    opt = {'marker': 'bs', 'label': '-'}\n",
    "    plot_helpers.plot_data(X[np.where(Y == -1)[0], 0], X[np.where(Y == -1)[0], 1], fig=fig, options=opt)\n",
    "\n",
    "    opt = {'n_points': 20, 'x_label': '$x$', 'y_label': '$y$'}\n",
    "    plot_helpers.plot_classification_boundaries(X, classifier, fig=fig, options=opt)\n",
    "\n",
    "interact(change_k_nn,\n",
    "         k=ipywidgets.IntSlider(value=1,\n",
    "                                min=1,\n",
    "                                max=9,\n",
    "                                step=1,\n",
    "                                description='k',\n",
    "                                style={'description_width': 'initial'},\n",
    "                                continuous_update=False)\n",
    "         );\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}